# clear
rm(list=ls())

options(warn=0)

# libs
library(haven)
library(data.table)
library(lubridate)
library(tidyverse)

outdate <- gsub("-","_",today())

# read data
X <- data.table(read.csv("01_InData/ltg_forecasts.csv"))
X<-na.omit(X,cols="TICKER")
IBES_CRSP<-data.table(read.csv("01_InData/IBES_CRSP_link_table.csv"))
X<-X[, max_anntims :=max(ANNTIMS),by=c("ANALYS", "TICKER","ANNDATS")]
X<-X[ANNTIMS==max_anntims,]
X<-X[, ANNDATS:=as.Date(as.character(ANNDATS), "%Y%m%d")]


# Merge with recommendations
Recs<- data.table(read.csv("01_InData/analyst_recommendations.csv"))

# Only consider the last recommendation of the day
Recs <- Recs[order(AMASKCD,TICKER, ANNDATS, ANNTIMS)]
Recs<-Recs[, max_anntims :=max(ANNTIMS),by=c("AMASKCD", "TICKER","ANNDATS")]
Recs<-Recs[ANNTIMS==max_anntims,]
Recs<-Recs[, ANNDATS:=as.Date(as.character(ANNDATS), "%Y%m%d")]


# Merge 
X_Recs<-
  merge(
    X,
    Recs,
    by.x=c("ANALYS", "TICKER"),
    by.y=c("AMASKCD", "TICKER"),
    allow.cartesian=TRUE
  )

# Want to keep the forecasts matched with the most recent recommendation of the analyst
setnames(X_Recs, "ANNDATS.y", "ANNDATS")
setnames(X_Recs, "ANNDATS.x", "anndats_revision")

# Recommendation date is before the revision date
X_Recs<-X_Recs[ANNDATS<anndats_revision,]
X_Recs<-X_Recs[, max_ANNDATS :=max(ANNDATS),by=c("ANALYS", "TICKER", "anndats_revision")]
X_Recs<-X_Recs[max_ANNDATS==ANNDATS,]

#Determine the last forecast per analyst, firm, year
X_Recs<-X_Recs[, anndats_year := format(anndats_revision, "%Y")]
X_Recs<-X_Recs[, max_anndats_revision := max(anndats_revision), by=c("ANALYS", "TICKER", "anndats_year")]
X_Recs<-X_Recs[anndats_revision==max_anndats_revision,]
X_Recs<-X_Recs[IRECCD<3, good_mean := mean(VALUE), by=c("TICKER", "anndats_year")]
X_Recs<-X_Recs[IRECCD<3, good_median := median(VALUE), by=c("TICKER", "anndats_year")]
X_Recs<-X_Recs[IRECCD>3, bad_mean := mean(VALUE), by=c("TICKER", "anndats_year")]
X_Recs<-X_Recs[IRECCD>3, bad_median := median(VALUE), by=c("TICKER", "anndats_year")]
X_Recs<-X_Recs[IRECCD==3, neutral_mean := mean(VALUE), by=c("TICKER", "anndats_year")]
X_Recs<-X_Recs[IRECCD==3, neutral_median := median(VALUE), by=c("TICKER", "anndats_year")]

X_Recs<-X_Recs[, good_mean1 :=max(good_mean, na.rm=TRUE), by=c("TICKER", "anndats_year")]
X_Recs<-X_Recs[, good_median1 :=max(good_median, na.rm=TRUE), by=c("TICKER", "anndats_year")]
X_Recs<-X_Recs[, bad_mean1 :=max(bad_mean, na.rm=TRUE), by=c("TICKER", "anndats_year")]
X_Recs<-X_Recs[, bad_median1 :=max(bad_median, na.rm=TRUE), by=c("TICKER", "anndats_year")]
X_Recs<-X_Recs[, neutral_mean1 :=max(neutral_mean, na.rm=TRUE), by=c("TICKER", "anndats_year")]
X_Recs<-X_Recs[, neutral_median1 :=max(neutral_median, na.rm=TRUE), by=c("TICKER", "anndats_year")]

X_Recs<-unique(X_Recs, by=c("TICKER","anndats_year"))


# Determine the revision size    
list1<-c("good_mean1","good_median1", "bad_mean1", "bad_median1", "neutral_mean1", "neutral_median1")

X_Recs<-X_Recs[order(TICKER,anndats_revision)]
for (v in (1:length(list1))){
  cmdstr1<-paste0("X_Recs[, lag1_",list1[v]," := shift(", list1[v],", n=1, type='lag'), by=list(TICKER)]")
  eval(parse(text=cmdstr1))
  cmdstr1<-paste0("X_Recs[, revision_",list1[v],":=", list1[v],"-lag1_",list1[v],"]")
  eval(parse(text=cmdstr1))
}



# Merge with actuals
Actuals <- data.table(read.csv("01_InData/actuals.csv"))
Actuals[, FPEDATS:=PENDS]

Actuals<-Actuals[PDICITY=="ANN",]



Actuals<-unique(Actuals, by=c("TICKER","FPEDATS"))

# Calculate ltg-actuals
Actuals[, actual:=VALUE]
Actuals<-Actuals[order(TICKER, FPEDATS)]



# Adjust for stock splits
Actuals<-
  merge(
    Actuals,
    IBES_CRSP,
    by=c("TICKER")
  )
Actuals<-Actuals[ANNDATS>=sdate & ANNDATS<=edate,]
Actuals[, ANNDATS := as.Date(as.character(ANNDATS), "%Y%m%d")]
Actuals[, FPEDATS := as.Date(as.character(FPEDATS),"%Y%m%d")]
Actuals[, year := format(FPEDATS, "%Y")]
Actuals<-na.omit(Actuals, cols="PERMNO")

CRSP_daily <- data.table(read.csv("01_InData/crsp.csv"))
CRSP_daily[, date := as.Date(as.character(date), "%Y%m%d")]

Actuals<-
  merge(
    Actuals,
    CRSP_daily,
    by.x=c("PERMNO","ANNDATS"),
    by.y=c("PERMNO", "date"),
    all.x=TRUE
  )
Actuals[, actual_adj:=actual/CFACSHR]

Actuals<-Actuals[order(TICKER, FPEDATS)]


for (v in (3:5)){
  cmdstr1<-paste0("Actuals[, lead",v,"_actual := shift(actual_adj, n=",v,",type='lead'),by=list(TICKER)]")
  eval(parse(text=cmdstr1))
  cmdstr1<-paste0("Actuals[, ltg_",v," := (((lead",v,"_actual-actual_adj)/abs(actual_adj)+1)^(1/",v,")-1)*100]")
  eval(parse(text=cmdstr1))
}

Actuals<-Actuals[, max_fpedats :=max(FPEDATS, na.rm=TRUE), by=c("TICKER", "year")]
Actuals<-Actuals[max_fpedats==FPEDATS, ]
Actuals<-unique(Actuals, by=c("TICKER","FPEDATS"))


# Merge the forecast revisions with actuals
Forecasts_act<-
  merge(
  X_Recs,
  Actuals,
  by.x = c("TICKER", "anndats_year"),
  by.y = c("TICKER", "year")
)


# Determine the y variable
for (a in (1:length(list1))){
for (v in (3:5)){
  cmdstr1<-paste0("Forecasts_act<-Forecasts_act[, forecast_error",v,"_",list1[a],":= ltg_",v,"-", list1[a],"]")
  eval(parse(text=cmdstr1))
}
}
Forecasts_act <- Forecasts_act %>% 
   select(-matches("(.x|.y)$"))

write_dta(Forecasts_act, path="02_OutData/forecasts_act_ltg_cons.dta")


